# Title: De-stereotyping Public Performance Evaluation
# Yixin Liu & Chengxin Xu 2022
# International Public Management Journal
# IPMJ Replication file for Study 3

library(ggplot2) # plot
library(pwr) # power
library(ggpubr) # plot
library(effsize) # effect size
library(multiwayvcov) # cluster se
library(lmtest) # cluster se
library(egg) # ggplot(theme_article)
library(cowplot) # ggplot output
library(Rmisc) # plot stat table function
library(stargazer) # latex
library(xtable) # latex
library(broman) # power

####Function####
# figure title box functions
element_textbox <- function(...) {
  el <- element_text(...)
  class(el) <- c("element_textbox", class(el))
  el
}

element_grob.element_textbox <- function(element, ...) {
  text_grob <- NextMethod()
  rect_grob <- element_grob(calc_element("strip.background", theme_bw()))
  
  ggplot2:::absoluteGrob(
    grid::gList(
      element_grob(calc_element("strip.background", theme_bw())),
      text_grob
    ),
    height = grid::grobHeight(text_grob), 
    width = grid::unit(1, "npc")
  )
}

# percentage function
pcentFun <- function(x) {
  res <- na.omit(x)
  100 * (sum(res) / length(res))
}


# Setup working directory
WD = getwd()
setwd(WD)

####Data####
s3<-read.csv("Study3_Liu&Xu_IPMJ.csv")

####Data transformation####
# transform data from individual level to profile level
JE<-data.frame("observation" = 1:400,
               "IndID" = rep(c(1:200),each=2),
               "Profile" = rep(c(1,2),200),
               "perf" = NA, "send" = NA,
               "Demand" = NA,
               "SAT" = NA,
               "student" = rep(c("White","Black"),200))

#####performance####
###for the 1st profile
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),2])
  perf<-which(JE$IndID==i & JE$Profile==1)
  JE$perf[perf]<-resp
}
###for the 2nd profile
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),3])
  perf<-which(JE$IndID==i & JE$Profile==2)
  JE$perf[perf]<-resp
}

#####school choice####
###for the 1st profile
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),4])
  send<-which(JE$IndID==i & JE$Profile==1)
  JE$send[send]<-resp
}
###for the 2nd profile
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),5])
  send<-which(JE$IndID==i & JE$Profile==2)
  JE$send[send]<-resp
}

#####SAT####
# white
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),18])
  SAT<-which(JE$IndID==i & JE$Profile==1)
  JE$SAT[SAT]<-resp
}
# black
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),19])
  SAT<-which(JE$IndID==i & JE$Profile==2)
  JE$SAT[SAT]<-resp
}

#####Demand####
for (i in 1:200){
  resp<-s3[which(s3[,21]==i),20]
  Demand<-which(JE$IndID==i)
  JE$Demand[Demand]<-resp
}

#####covariates####
# gender
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),8])
  sex<-which(JE$IndID==i)
  JE$sex[sex]<-resp
}
# race
for (i in 1:200){
  resp<-s3[which(s3[,21]==i),9]
  race<-which(JE$IndID==i)
  JE$race[race]<-resp
}
# age
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),10])
  age<-which(JE$IndID==i)
  JE$age[age]<-resp
}
# state
for (i in 1:200){
  resp<-s3[which(s3[,21]==i),11]
  state<-which(JE$IndID==i)
  JE$state[state]<-resp
}
# parent
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),22])
  parent<-which(JE$IndID==i)
  JE$parent[parent]<-resp
}
# income
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),13])
  income<-which(JE$IndID==i)
  JE$income[income]<-resp
}
# education
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),14])
  education<-which(JE$IndID==i)
  JE$education[education]<-resp
}
# ideology
for (i in 1:200){
  resp<-as.numeric(s3[which(s3[,21]==i),15])
  ideology<-which(JE$IndID==i)
  JE$ideology[ideology]<-resp
}

####Regression####
JE$student <-as.factor(JE$student)
JE$student = relevel(JE$student, ref="White")

# performance
mod1<-lm(perf~Demand+student+Demand*student, data = JE)
summary(mod1)
vcov_1 <- cluster.vcov(mod1, JE$IndID)
v1<-coeftest(mod1, vcov_1)
v1

mod1.1<-lm(perf~Demand+student+Demand*student
           +SAT+sex+race+age+state+parent+income+education+ideology, 
           data = JE)
summary(mod1.1)
vcov_1.1 <- cluster.vcov(mod1.1, JE$IndID)
v1.1<-coeftest(mod1.1, vcov_1.1)
v1.1

# school choice
mod2<-lm(send~Demand+student+Demand*student, data = JE)
summary(mod2)
vcov_2 <- cluster.vcov(mod2, JE$IndID)
v2<-coeftest(mod2, vcov_2)
v2

mod2.1<-lm(send~Demand+student+Demand*student
           +SAT+sex+race+age+state+parent+income+education+ideology, 
           data = JE)
summary(mod2.1)
vcov_2.1 <- cluster.vcov(mod2.1, JE$IndID)
v2.1<-coeftest(mod2.1, vcov_2.1)
v2.1

####Table 5####
stargazer(mod1,mod1.1,mod2,mod2.1,
          style = "apsr", type="html", out="Table5.html",
          se=list(v1[,2],v1.1[,2],v2[,2],v2.1[,2]),
          p=list(v1[,4],v1.1[,4],v2[,4],v2.1[,4]),
          covariate.labels=c("DemandxBlack","Demand",
                             "Students' major Race: Black"),
          column.labels=c("Perceived Performance",
                          "Perceived Performance (covariates)",
                          "School Choice",
                          "School Choice (covariates)"),
          omit = c("sex","race","age","state","parent","income",
                   "education","ideology","SAT"),
          dep.var.labels.include = FALSE,
          intercept.bottom=TRUE,
          order = c("DemandTreatment:studentBlack","DemandTreatment","studentBlack"),
          add.lines = list(c("Covariates", 
                             "No","Yes","No","Yes")),
          keep.stat=c("adj.rsq","n"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          align=TRUE,no.space=TRUE)

####Appendix C####
#####C2####
##Characteristics of sample

# gender dummy
s3$female<-ifelse(s3$sex==0,1,0)
# race
s3$white<-ifelse(s3$race=="white",1,0)
s3$black<-ifelse(s3$race=="black",1,0)
s3$his<-ifelse(s3$race=="hispanic",1,0)
s3$asian<-ifelse(s3$race=="asian",1,0)
s3$other<-ifelse(s3$race=="other",1,0)
# age
s3$young<-ifelse(s3$age<=29,1,0)
s3$midage<-ifelse(s3$age>29 & s3$age<=49,1,0)
s3$old<-ifelse(s3$age>49,1,0)
# income
s3$low<-ifelse(s3$income==1,1,0)
s3$med<-ifelse(s3$income>=2 & s3$income<5,1,0)
s3$high<-ifelse(s3$income>=5,1,0)
# education
s3$hed<-ifelse(s3$education>=5,1,0)
# ideology
s3$con<-ifelse(s3$ideology>=4,1,0)
s3$lib<-ifelse(s3$ideology<=2,1,0)
s3$mod<-ifelse(s3$ideology==3,1,0)

# subgroup control and treatment
s3.control<-subset(s3, rand=="Control")
s3.treatment<-subset(s3, rand=="Treatment")

## ANOVA F test (randomization check)
# gender
female.aov <- aov(female ~ rand, data = s3)
summary(female.aov)
female<-summary(female.aov)[[1]][,5][1]
male.aov <- aov(sex ~ rand, data = s3)
summary(male.aov)
male<-summary(male.aov)[[1]][,5][1]
# race
wht.aov <- aov(white ~ rand, data = s3)
summary(wht.aov)
wht<-summary(wht.aov)[[1]][,5][1]
blk.aov <- aov(black ~ rand, data = s3)
summary(blk.aov)
blk<-summary(blk.aov)[[1]][,5][1]
his.aov <- aov(his ~ rand, data = s3)
summary(his.aov)
his<-summary(his.aov)[[1]][,5][1]
asian.aov <- aov(asian ~ rand, data = s3)
summary(asian.aov)
asian<-summary(asian.aov)[[1]][,5][1]
other.aov <- aov(other ~ rand, data = s3)
summary(other.aov)
other<-summary(other.aov)[[1]][,5][1]
# age
young.aov <- aov(young ~ rand, data = s3)
summary(young.aov)
young<-summary(young.aov)[[1]][,5][1]
mid.aov <- aov(midage ~ rand, data = s3)
summary(mid.aov)
midage<-summary(mid.aov)[[1]][,5][1]
old.aov <- aov(old ~ rand, data = s3)
summary(old.aov)
old<-summary(old.aov)[[1]][,5][1]
# income
low.aov <- aov(low ~ rand, data = s3)
summary(low.aov)
low<-summary(low.aov)[[1]][,5][1]
med.aov <- aov(med ~ rand, data = s3)
summary(med.aov)
med<-summary(med.aov)[[1]][,5][1]
high.aov <- aov(high ~ rand, data = s3)
summary(high.aov)
high<-summary(high.aov)[[1]][,5][1]
# education
hed.aov <- aov(hed ~ rand, data = s3)
summary(hed.aov)
hed<-summary(hed.aov)[[1]][,5][1]
# parent
parent.aov <- aov(par ~ rand, data = s3)
summary(parent.aov)
parent<-summary(parent.aov)[[1]][,5][1]
# ideology
lib.aov <- aov(lib ~ rand, data = s3)
summary(lib.aov)
lib<-summary(lib.aov)[[1]][,5][1]
con.aov <- aov(con ~ rand, data = s3)
summary(con.aov)
con<-summary(con.aov)[[1]][,5][1]
mod.aov <- aov(mod ~ rand, data = s3)
summary(mod.aov)
mod<-summary(mod.aov)[[1]][,5][1]

#####Table C1####
des_stat<-matrix(NA,18,7)
rownames(des_stat)<-c("Female","Male",
                      "White","Black","Hispanic","Asian","Other",
                      "Age: 18-29","Age: 30-49","Age: >= 50",
                      "Income: < $25k",
                      "Income: $25k to $75k",
                      "Income: >= $75k",
                      "College degree",
                      "Conservative","Liberal","Moderate",
                      "Parenthood")
colnames(des_stat)<-c("Frequency","%","Frequency","%",
                      "Frequency","%","P-value")

tol.sum<-round(apply(s3[,c(24,8,25:39,22)],
                     2,sum,na.rm=T),digits=0)
treat.sum<-round(apply(s3.treatment[,c(24,8,25:39,22)],
                       2,sum,na.rm=T),digits=0)
control.sum<-round(apply(s3.control[,c(24,8,25:39,22)],
                         2,sum,na.rm=T),digits=0)

tol.p<-round(apply(s3[,c(24,8,25:39,22)],
                   2,pcentFun),digits=0)
treat.p<-round(apply(s3.treatment[,c(24,8,25:39,22)],
                     2,pcentFun),digits=0)
control.p<-round(apply(s3.control[,c(24,8,25:39,22)],
                       2,pcentFun),digits=0)

des_stat[,1]<-tol.sum
des_stat[,3]<-treat.sum
des_stat[,5]<-control.sum
des_stat[,2]<-tol.p
des_stat[,4]<-treat.p
des_stat[,6]<-control.p
des_stat[,7]<-round(c(female,male,wht,blk,his,asian,other,
                      young,midage,old,low,med,high,hed,
                      con,lib,mod,parent),2)

c1<-xtable(x=des_stat, digits = c(0,0,0,0,0,0,0,2))
#print.xtable(C1,type="html",file="c1.html")
